import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import pandas as pd
from pathos.multiprocessing import Pool
from linearmodels import OLS
import matplotlib.ticker as mtick
import datetime
from jm.library.data_helper import filter_data, date_to_str, is_between, \
    is_weakly_less_than, is_weakly_greater_than, is_less_than
from jm.library.likelihood import Likelihood
from jm.library.additional_helpers import plot_over_time, payment_cols, priority_cols, action_cols, \
    relative_payment_cols, fwdiff_payment_cols, relative_fwdiff_payment_cols, action_nomedida_cols
import warnings
warnings.filterwarnings("ignore")

rcParams['figure.figsize'] = (12.0, 4.5)
rcParams['lines.linewidth'] = 3
rcParams['font.size'] = 18

np.random.seed(1234)

color_by_num = plt.rcParams['axes.prop_cycle'].by_key()['color']

def to_percent(y_value, _):
    return f"{100 * y_value:.1f}%"


df_status = pd.read_csv('data/jesus_maria_status_deidentified.csv')


def is_payment_above_thresh_data(df, thresh=.5, ax=None):
    this_df = df[relative_payment_cols] > thresh
    this_df.columns = Likelihood.event_dates
    return this_df


def is_payment_above_thresh(df, thresh=.5, ax=None, linestyle='solid', color=None, figsize=None):
    this_df = is_payment_above_thresh_data(df, thresh, ax)
    plot_kwargs = {'linestyle': linestyle, 'color': color, 'figsize': figsize}
    plot_kwargs = {k: v for k, v in plot_kwargs.items() if v is not None}
    ax = this_df.mean(axis=0).plot(**plot_kwargs)
    plt.ylabel('Tax-Payers s.t. Payments > 50% Tax Due')
    return ax 


def generate_95p_ci(df, var):
    df_t = is_payment_above_thresh_data(df[df[var]==1])
    df_t['on'] = 1
    df_c = is_payment_above_thresh_data(df[df[var]==0])
    df_c['on'] = 0
    df = pd.concat([df_t, df_c])

    df.columns = ['time' + str(j) for j in range(0,len(Likelihood.event_dates))] + ['on']
    df['off'] = 1 - df['on']
    list_upperCI_control = []
    list_lowerCI_control = []
    list_upperCI_treatment = []
    list_lowerCI_treatment = []
    list_mean_control = []
    list_mean_treatment = []
    for i in range(0, len(Likelihood.event_dates)):
        formula = 'time' + str(i) + ' ~ on + off - 1'
        model = OLS.from_formula(formula, data=df)
        res = model.fit(cov_type='robust')
        list_upperCI_control.append(res.conf_int(level=0.95).loc['off']['upper'])
        list_lowerCI_control.append(res.conf_int(level=0.95).loc['off']['lower'])
        list_upperCI_treatment.append(res.conf_int(level=0.95).loc['on']['upper'])
        list_lowerCI_treatment.append(res.conf_int(level=0.95).loc['on']['lower'])
        list_mean_control.append(res.params['off'])
        list_mean_treatment.append(res.params['on'])

    upperCI_control = pd.DataFrame([list_upperCI_control], columns=Likelihood.event_dates)
    lowerCI_control = pd.DataFrame([list_lowerCI_control], columns=Likelihood.event_dates)
    upperCI_treatment = pd.DataFrame([list_upperCI_treatment], columns=Likelihood.event_dates)
    lowerCI_treatment = pd.DataFrame([list_lowerCI_treatment], columns=Likelihood.event_dates)
    mean_control = pd.DataFrame([list_mean_control], columns=Likelihood.event_dates)
    mean_treatment = pd.DataFrame([list_mean_treatment], columns=Likelihood.event_dates)
    
    return mean_treatment, mean_control, upperCI_control, lowerCI_control, upperCI_treatment, lowerCI_treatment


df_status[relative_payment_cols] = df_status[payment_cols].divide(
                df_status['total_due'], axis=0)


df_status[relative_fwdiff_payment_cols] = df_status[fwdiff_payment_cols].divide(
    df_status['total_due'], axis=0)


df_status['endo_type'] = np.nan
df_status.loc[
    df_status['assignment_to_treatment'] == 1, 'endo_type'] = (
    df_status['score_endo_covariates'] == df_status['effective_score'])

endo_df = df_status[(df_status['assignment_to_treatment'] == 0) | (df_status['endo_type'] == 1)]
exo_df = df_status[(df_status['assignment_to_treatment'] == 0) | (df_status['endo_type'] == 0)]


df_treatment = filter_data(df_status, assignment_to_treatment=1)
df_control = filter_data(df_status, assignment_to_treatment=0)


plt.clf()
ax = plot_over_time(filter_data(df_status, assignment_to_treatment=1), normalize=1e6)
plot_over_time(filter_data(df_status, assignment_to_treatment=0), normalize=1e6, ax=ax, linestyle='dashed')
plt.legend(['treatment', 'control'])
plt.ylabel('cumulative taxes collected \n (Millions of Soles)')

plt.tight_layout()
plt.savefig('figs/fig7.pdf')


df_status['weeks_from_G1_to_promise'] = df_status['days_from_G1_to_promise']/7


plt.clf()
fig, ax = plt.subplots(figsize=(12, 4.5))
ax = plot_over_time(
    filter_data(df_status, assignment_to_treatment=1, total_due=is_weakly_greater_than(1000)),
    col_fmt='relative_payments_by_{}', func=np.mean)
plot_over_time(
    filter_data(df_status, assignment_to_treatment=0, total_due=is_weakly_greater_than(1000)), 
    col_fmt='relative_payments_by_{}', func=np.mean, ax=ax, linestyle='dashed')
plt.ylim(0, 2)
yticks = ax.get_yticks()
for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')


plt.legend(['treatment', 'control'])
plt.ylabel('Mean Relative Payments')

plt.tight_layout()
plt.savefig('figs/figOA1a.pdf')


plt.clf()
fig, ax = plt.subplots(figsize=(12, 4.5))


ax = plot_over_time(
    filter_data(df_status, assignment_to_treatment=1, total_due=is_less_than(1000)),
    col_fmt='relative_payments_by_{}', func=np.mean)
plot_over_time(
    filter_data(df_status, assignment_to_treatment=0, total_due=is_less_than(1000)), 
    col_fmt='relative_payments_by_{}', func=np.mean, ax=ax, linestyle='dashed')
plt.ylim(0, 2)
yticks = ax.get_yticks()
for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')

plt.legend(['treatment', 'control'], loc='upper left')
plt.ylabel('Mean Relative Payments')

plt.tight_layout()
plt.savefig('figs/figOA1b.pdf')


p = df_status.groupby('assignment_to_treatment')['payments_by_2021-09-06'].sum()
with open("numbers/sec1.txt", "w") as text_file:
    print("Change in tax collection from control to treatment: " + str(p[1]/p[0]-1), file=text_file)


def generate_sample(df, covariate, sample_size, num_quantiles, replace=False):
    np.random.seed(0)
    batch1 = df['batch1_top'] + df['batch1_bottom'] > 0

    quantiles = df.loc[batch1, covariate].quantile([i/num_quantiles for i in range(num_quantiles+1)])
    quantiles.iloc[-1] += 1e-6

    this_df_control = df.loc[df.assignment_to_treatment==0, :]

    indices = []
    for i in range(num_quantiles):
        q_prev, q_next = quantiles[i/num_quantiles], quantiles[(i+1)/num_quantiles]
        is_valid = this_df_control[covariate].between(q_prev, q_next, inclusive='left')
        valid_sample = list(this_df_control.loc[is_valid].index)
        indices += list(np.random.choice(valid_sample, sample_size, replace=replace))

    selected_sample = list(set(list(df.loc[batch1].index) + indices))
    return selected_sample

selected_sample = generate_sample(df_status, 'score_endo_covariates', 60, 5)

df_status_b1 = df_status.loc[selected_sample, :]


[mean_treatment, mean_control, 
 upperCI_control, lowerCI_control, 
 upperCI_treatment, lowerCI_treatment] = generate_95p_ci(df_status_b1, 'assignment_to_treatment')

plt.clf()
ax = mean_treatment.iloc[0].plot(figsize=(12, 5))
mean_control.iloc[0].plot(ax = ax, linestyle='dashed')
ax.fill_between(Likelihood.event_dates,
                lowerCI_treatment.iloc[0],
                upperCI_treatment.iloc[0], color=color_by_num[0], alpha=0.5)
ax.fill_between(Likelihood.event_dates,
                lowerCI_control.iloc[0],
                upperCI_control.iloc[0], color=color_by_num[1], alpha=0.5)
ax.yaxis.set_major_formatter(mtick.FuncFormatter(to_percent))

plt.legend(['priority G1', 'comparable control'])
plt.ylabel('Share of Tax-payers with \n Payments / Tax-Due > .5', fontsize=16)
plt.tight_layout()
plt.savefig('figs/fig9.pdf')


covariate = 'prob_repayment_endo_covariates'

quantiles = df_status_b1[covariate].quantile([1/3, 2/3])


df_status_b1['high_endo'] = np.nan
df_status_b1.loc[df_status_b1['prob_repayment_endo_covariates']<=quantiles[1/3], 'high_endo'] = 0
df_status_b1.loc[df_status_b1['prob_repayment_endo_covariates']>=quantiles[2/3], 'high_endo'] = 1

[mean_high, mean_low, 
 upperCI_low, lowerCI_low, 
 upperCI_high, lowerCI_high] = generate_95p_ci(df_status_b1, 'high_endo')


plt.clf()
ax = mean_low.iloc[0].plot(figsize=(11, 4))
mean_high.iloc[0].plot(ax = ax, linestyle='dashed')
ax.fill_between(Likelihood.event_dates,
                lowerCI_low.iloc[0],
                upperCI_low.iloc[0], color=color_by_num[0], alpha=0.5)
ax.fill_between(Likelihood.event_dates,
                lowerCI_high.iloc[0],
                upperCI_high.iloc[0], color=color_by_num[1], alpha=0.5)
ax.yaxis.set_major_formatter(mtick.FuncFormatter(to_percent))

plt.ylabel('Share of Tax-payers with \n Payments / Tax-Due > .5', fontsize=16)
plt.legend(['low prob (endo)', 'high prob (endo)'])
plt.tight_layout()
plt.savefig('figs/fig10a.pdf')


covariate = 'prob_repayment_exo_covariates'

quantiles = df_status_b1[covariate].quantile([1/3, 2/3])

df_status_b1['high_exo'] = np.nan
df_status_b1.loc[df_status_b1['prob_repayment_exo_covariates']<=quantiles[1/3], 'high_exo'] = 0
df_status_b1.loc[df_status_b1['prob_repayment_exo_covariates']>=quantiles[2/3], 'high_exo'] = 1


[mean_high, mean_low, 
 upperCI_low, lowerCI_low, 
 upperCI_high, lowerCI_high] = generate_95p_ci(df_status_b1, 'high_exo')


plt.clf()
ax = mean_low.iloc[0].plot(figsize=(11,4))
mean_high.iloc[0].plot(ax = ax, linestyle='dashed')
ax.fill_between(Likelihood.event_dates,
                lowerCI_low.iloc[0],
                upperCI_low.iloc[0], color=color_by_num[0], alpha=0.5)
ax.fill_between(Likelihood.event_dates,
                lowerCI_high.iloc[0],
                upperCI_high.iloc[0], color=color_by_num[1], alpha=0.5)
ax.yaxis.set_major_formatter(mtick.FuncFormatter(to_percent))

plt.ylabel('Share of Tax-payers with \n Payments / Tax-Due > .5', fontsize=16)
plt.legend(['low prob (exo)', 'high prob (exo)'])
plt.tight_layout()
plt.savefig('figs/fig10b.pdf')


covariate = 'total_due'

quantiles = df_status_b1[covariate].quantile([1/3, 2/3])


df_status_b1['high_due'] = np.nan
df_status_b1.loc[df_status_b1['total_due']<=quantiles[1/3], 'high_due'] = 0
df_status_b1.loc[df_status_b1['total_due']>=quantiles[2/3], 'high_due'] = 1



[mean_high, mean_low, 
 upperCI_low, lowerCI_low, 
 upperCI_high, lowerCI_high] = generate_95p_ci(df_status_b1, 'high_due')


plt.clf()
ax = mean_low.iloc[0].plot(figsize=(11,4))
mean_high.iloc[0].plot(ax = ax, linestyle='dashed')
ax.fill_between(Likelihood.event_dates,
                lowerCI_low.iloc[0],
                upperCI_low.iloc[0], color=color_by_num[0], alpha=0.5)
ax.fill_between(Likelihood.event_dates,
                lowerCI_high.iloc[0],
                upperCI_high.iloc[0], color=color_by_num[1], alpha=0.5)
ax.yaxis.set_major_formatter(mtick.FuncFormatter(to_percent))

plt.ylabel('Share of Tax-payers with \n Payments / Tax-Due > .5', fontsize=16)
plt.legend(['low total due', 'high total due'])
plt.tight_layout()
plt.savefig('figs/fig10c.pdf')


plt.clf()
selected_sample = generate_sample(
    df_status[(df_status['batch1_bottom']==1) | 
              (df_status['assignment_to_treatment']==0)], 'score_endo_covariates', 500, 5)

df_status_lower200 = df_status.loc[selected_sample, :]

fig = plt.figure(figsize=(12,7))
ax = plot_over_time(
    filter_data(df_status_lower200, assignment_to_treatment=1), col_fmt='relative_payments_by_{}', func=np.mean)
plot_over_time(
    filter_data(df_status_lower200, assignment_to_treatment=0), 
    col_fmt='relative_payments_by_{}', func=np.mean, ax=ax)

plt.ylim(0, 2)
yticks = ax.get_yticks()
for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')


plt.tight_layout()

plt.legend(['priority G1 - bottom 200 of initial G1', 'comparable control'], loc='upper left')
plt.ylabel('Mean Relative Payments')
plt.savefig('figs/figOA2.pdf')


def is_ever(df, cols, value, col_names=Likelihood.event_dates):
    global REF_DF 
    global REF_COLS 
    global REF_VALUE 
    
    REF_DF = df
    REF_COLS = cols
    REF_VALUE = value

    with Pool() as pool:
        list_is = pool.map(
            lambda i: REF_DF.loc[:, REF_COLS[:i+1]].apply(lambda r: np.isin(REF_VALUE, r), axis=1, raw=True), 
            list(range(len(REF_COLS))))
    this_df = pd.concat(list_is, axis=1)
    this_df.columns = col_names
    return 1 * this_df


plt.clf()
plt.figure(figsize=(10, 4.5))

# is ever valor by treatment group
ax = is_ever(
    filter_data(df_status, assignment_to_treatment=1), action_cols, 'valor').sum().plot()
is_ever(
    filter_data(df_status, assignment_to_treatment=0), action_cols, 'valor').sum().plot(
    ax=ax, linestyle='dashed')

# plt.ylabel('number of tax-payers issued notifications')
ax.set_yticks([0, 1000, 2000, 3000, 4000])
ax.set_yticklabels([0, 1000, 2000, 3000, 4000])
yticks = ax.get_yticks()
for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')
ax.set_ylim([0, 4500])

plt.legend(['treatment', 'control'], loc='center')

plt.tight_layout()

plt.savefig('figs/fig6a.pdf')


df_rec1_1 = is_ever(
    filter_data(df_status, assignment_to_treatment=1), action_nomedida_cols, 'rec1')
df_medida_1 = is_ever(
    filter_data(df_status, assignment_to_treatment=1), action_cols, 'medida')


df_rec1_0 = is_ever(
    filter_data(df_status, assignment_to_treatment=0), action_nomedida_cols, 'rec1')
df_medida_0 = is_ever(
    filter_data(df_status, assignment_to_treatment=0), action_cols, 'medida')


plt.clf()
# is ever rec1 by treatment group
ax = df_rec1_1.sum().plot()
df_rec1_0.sum().plot(ax=ax, linestyle='dashed')

yticks = ax.get_yticks()
for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')

ax.set_ylim([0, 3750])

plt.legend(['treatment', 'control'], loc='upper left')
plt.tight_layout()
plt.savefig('figs/fig6b.pdf')


plt.clf()
# is ever medida by treatment group
ax = is_ever(
    filter_data(df_status, assignment_to_treatment=1), action_cols, 'medida').sum().plot()
is_ever(
    filter_data(df_status, assignment_to_treatment=0), action_cols, 'medida').sum().plot(ax=ax, linestyle='dashed')

ax.set_ylim([0, 550])
ax.set_yticks([0,100,200,300,400,500])
yticks = ax.get_yticks()

for tick in yticks:
    ax.axhline(y=tick, linestyle='--', linewidth=0.5, color='gray')

plt.legend(['treatment', 'control'], loc='upper left')

plt.tight_layout()
plt.savefig('figs/fig6c.pdf')


quantiles = [i/100. for i in range(101)]
quantiles_due = df_status['total_due'].quantile(quantiles)
quantiles_due = list(quantiles_due)


total_collected = df_status.groupby('assignment_to_treatment')['payments_by_2021-09-06'].sum()


df_status.loc[df_status['assignment_to_treatment']==1, 
              'contribution_to_total_collected'] = df_status[payment_cols[-1]] / total_collected[1]
df_status.loc[df_status['assignment_to_treatment']==0, 
              'contribution_to_total_collected'] = df_status[payment_cols[-1]] / total_collected[0]


share_collected_by_quantile = pd.DataFrame(index=[100 * q for q in quantiles], columns=[0, 1])
for q, d in zip(quantiles, quantiles_due):
    this_df = filter_data(df_status, total_due=is_weakly_less_than(d))
    share_collected_by_quantile.loc[100*q] = this_df.groupby(
        'assignment_to_treatment')['contribution_to_total_collected'].sum()


share_collected_by_quantile.fillna(0, inplace=True) 

plt.clf()
ax = share_collected_by_quantile[[1]].plot(figsize=(10, 4.5))
share_collected_by_quantile[[0]].plot(ax=ax, linestyle="dashed")
plt.xlabel('quantile of tax-due')
plt.ylabel('share of taxes collected') 
plt.legend(['treatment', 'control'])
plt.ylim(0, .8)
plt.xlim(0, 100)
plt.tight_layout()
plt.savefig('figs/fig8.pdf')


sum_total_due = filter_data(df_status, assignment_to_treatment=1)['total_due'].sum()
share_total_due_by_quantile = pd.DataFrame(index=[int(100 * q) for q in quantiles], columns=['share total due'])
for q, d in zip(quantiles, quantiles_due):
    this_df = filter_data(df_status, assignment_to_treatment=1, total_due=is_weakly_less_than(d))
    share_total_due_by_quantile.loc[int(100*q)] = this_df['total_due'].sum() / sum_total_due

ax = share_total_due_by_quantile.plot(figsize=(10, 4.5))
plt.xlabel('quantile of tax-due')
plt.ylabel('share of taxes due')  
plt.ylim(0, 1)
plt.xlim(0, 100)

df_valor_1 = is_ever(
    filter_data(df_status, assignment_to_treatment=1), action_cols, 'valor')
df_valor_0 = is_ever(
    filter_data(df_status, assignment_to_treatment=0), action_cols, 'valor')

with open("figs/table4.txt", "w") as text_file:
    print("Valor (Control): " + str(df_valor_0.sum()[-1]), file=text_file)
    print("Valor (Treatment): " + str(df_valor_1.sum()[-1]), file=text_file)
    print("Rec 1 (Control): " + str(df_rec1_0.sum()[-1]), file=text_file)
    print("Rec 1 (Treatment): " + str(df_rec1_1.sum()[-1]), file=text_file)
    print("Processed Delinquents (Control): " + str(df_medida_0.sum()[-1]), file=text_file)
    print("Processed Delinquents (Treatment): " + str(df_medida_1.sum()[-1]), file=text_file)


np.random.seed(1234)
v = df_status[['score_endo_covariates', 'score_exo_covariates']].values
ix = np.random.randint(0,v.shape[1],df_status.shape[0])
df_status['effective_score_control'] = np.take_along_axis(v, ix[:,None], 1)
df_status.loc[
    df_status['assignment_to_treatment']==0, 'effective_score'] = df_status.loc[
    df_status['assignment_to_treatment']==0, 'effective_score_control']

priority_status_cols = [c for c in df_status.columns if c.find('priority_by')>-1]

# is ever G1
is_ever_G1 = is_ever(df_status, priority_status_cols, 'G1', col_names=priority_status_cols)
is_ever_G1 = is_ever_G1['priority_by_2021-05-31']
df_status['is_ever_G1'] = is_ever_G1



### Table 6

quantiles_to_use = [j/50 for j in range(1,51)]
list_quantiles = [0] + list(df_status['effective_score'].quantile(quantiles_to_use))
list_is_ever_G1_by_quantile = []
for lower, upper in zip(list_quantiles[0:-1], list_quantiles[1:]):
    list_is_ever_G1_by_quantile.append(
        filter_data(df_status, 
                    effective_score=is_between(lower,upper),
                    assignment_to_treatment=1,
                    batch1_bottom=0)['is_ever_G1'].mean())

first_non0_score_quantile = next((i for i, x in enumerate(list_is_ever_G1_by_quantile) if x), None)

df_status['relative_total_due_Q2'] = df_status['total_due_Q2'].divide(
    df_status['Q1_total_due'])

df_status['is_ever_G1_control'] = (
    df_status['assignment_to_treatment']==0) & (
    df_status['effective_score']>list_quantiles[first_non0_score_quantile])

df_status['potential_G1'] = df_status['effective_score']>list_quantiles[first_non0_score_quantile]
df_status['potential_G1_treatment'] = df_status['potential_G1'] * df_status['assignment_to_treatment']
formula = 'relative_total_due_Q2 ~ 1 + assignment_to_treatment + potential_G1 + potential_G1_treatment'
model = OLS.from_formula(formula, data=df_status)
res = model.fit(cov_type='robust')

df_results = res.params.to_frame('Parameter 1')
df_results['Standard Error 1'] = res.std_errors

# Print Table 6
with open("figs/table6.txt", "w") as text_file:
    text_file.write(df_results.to_latex(float_format="%.3f"))

def find_payment_within_n(df, date, n):
    for j in range(n, len(Likelihood.event_dates)):
        if (date==Likelihood.event_dates[j-n]):
            return df[payment_cols[j]]


def find_payment_at_n(df, date, n):
    for j in range(n, len(Likelihood.event_dates)):
        if (date==Likelihood.event_dates[j-n]):
            return df[fwdiff_payment_cols[j]]


def first_time_event(df, varname, value, cols, ne=False):
    df[varname] = np.nan
    if ne:
        df.loc[
            df[cols[0]]!=value, varname] = Likelihood.event_dates[0]
        for j in range(1, len(Likelihood.event_dates)):
            df.loc[(df[cols[j-1]]==value) &
                   (df[cols[j]]!=value) &
                   (df[varname].isnull()),
                   varname] = Likelihood.event_dates[j]
    else:
        df.loc[
            df[cols[0]]==value, varname] = Likelihood.event_dates[0]
        for j in range(1, len(Likelihood.event_dates)):
            df.loc[(df[cols[j-1]]!=value) &
                   (df[cols[j]]==value) &
                   (df[varname].isnull()),
                   varname] = Likelihood.event_dates[j]


first_time_event(df_status, 'first_time_payment', 0, payment_cols, ne=True)
first_time_event(df_status, 'first_time_rec1', "rec1", action_cols)
first_time_event(df_status, 'first_time_medida', "medida", action_cols)

df_status['medida_date'] = pd.to_datetime(df_status['medida_fecha'],format="%d/%m/%Y")
df_status['promise_date'] = pd.to_datetime(df_status['fecha_limite'], format="%d/%b/%y")

df_status.loc[
    df_status['assignment_to_treatment']==1,
    'time_from_promise_to_medida'] = df_status['medida_date']-df_status['promise_date']
df_status['days_from_promise_to_medida'] = df_status['time_from_promise_to_medida'].dt.days


df_status['days_from_G1_to_promise'].describe()

G1_cutoff_date = pd.to_datetime(datetime.date(2021, 6, 1))
promise_cutoff_date = pd.to_datetime(datetime.date(2021, 9, 6))

df_status_Q1G1 = df_status.loc[(pd.to_datetime(df_status['first_time_G1']) < G1_cutoff_date) &
                               (pd.to_datetime(df_status['promise_date']) <= promise_cutoff_date)]

medidas = set(df_status_Q1G1.loc[df_status_Q1G1[action_cols[-1]]=="medida"].index)
no_second_medida_promise = set(df_status_Q1G1.loc[df_status_Q1G1['fecha_limite_2'].isna()].index)
no_payments = set(df_status_Q1G1.loc[df_status_Q1G1[payment_cols[-1]]==0].index)
first_payment_after_medida = set(df_status_Q1G1.loc[
    df_status_Q1G1['first_time_payment']>=df_status_Q1G1['first_time_medida']].index)
no_medida_no_payment = set(df_status_Q1G1[
    (df_status_Q1G1[action_cols[-1]]!="medida") & (df_status_Q1G1[payment_cols[-1]]==0)].index)

index_to_include = medidas.intersection(
    no_second_medida_promise.union(first_payment_after_medida).union(no_payments))
index_to_include = index_to_include.union(no_medida_no_payment)

df_status_Q1G1_promisemedida = df_status_Q1G1.loc[list(index_to_include)]
df_status_Q1G1_promisemedida.sort_index(inplace=True)


def lower_bound_time_delta(df, var, format=None):
    return pd.to_datetime('2021-09-06', format="%Y-%m-%d") - pd.to_datetime(df[var], format=format)


df_status_Q1G1_promisemedida['temp_date_promise_to_medida'] = lower_bound_time_delta(
    df_status_Q1G1_promisemedida, var='fecha_limite', format="%d/%b/%y")
df_status_Q1G1_promisemedida['days_from_promise_to_medida'].fillna(
    df_status_Q1G1_promisemedida['temp_date_promise_to_medida'].dt.days, inplace=True)


with open("numbers/sec4.txt", "w") as text_file:
    print("G1s issued for Q1:" + str(
        len(df_status_Q1G1)
         ), file=text_file)
    print("G1s issued for Q1 that became medida before 06-Sep:" + str(sum(~df_status_Q1G1['first_time_medida'].isnull()))
          , file=text_file)
    print("Average days from medida to promise for Q1 G1s: " +
          str(np.mean(df_status_Q1G1_promisemedida['days_from_promise_to_medida']))
          , file=text_file)
    print("G1s issued in Q1 that should have become medida but did not: " + str(
        len(df_status_Q1G1[
            (df_status_Q1G1['time_from_promise_to_medida'].isnull()) & (df_status_Q1G1[relative_payment_cols[-1]]==0)]))
          , file=text_file)
